{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The autoreload extension is already loaded. To reload it, use:\n",
      "  %reload_ext autoreload\n"
     ]
    }
   ],
   "source": [
    "from IPython.core.interactiveshell import InteractiveShell\n",
    "InteractiveShell.ast_node_interactivity = 'all'  # default is ‘last_expr'\n",
    "\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('/home/gramener/CameraTraps')  # append this repo to PYTHONPATH"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import os\n",
    "from collections import Counter, defaultdict\n",
    "from random import sample\n",
    "import math\n",
    "\n",
    "from tqdm import tqdm\n",
    "\n",
    "from data_management.megadb.schema import sequences_schema_check\n",
    "from data_management.annotations.add_bounding_boxes_to_megadb import *\n",
    "from data_management.megadb.converters.cct_to_megadb import make_cct_embedded, process_sequences, write_json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "path_to_output = '/home/gramener/rspb_megadb.json'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_name = 'RSPB'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# path to the CCT json, or a loaded json object\n",
    "path_to_image_cct = '/home/gramener/rspb.json'  # set to None if not available\n",
    "path_to_bbox_cct = None  # set to None if not available\n",
    "assert not (path_to_image_cct is None and path_to_bbox_cct is None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading image DB...\n",
      "Number of items from the image DB: 4858\n",
      "Number of images with more than 1 species: 0 (0.0% of image DB)\n",
      "No bbox DB provided.\n"
     ]
    }
   ],
   "source": [
    "embedded = make_cct_embedded(image_db=path_to_image_cct, bbox_db=path_to_bbox_cct)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The dataset_name is set to RSPB. Please make sure this is correct!\n",
      "Making a deep copy of docs...\n",
      "Putting 4858 images into sequences...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4858/4858 [00:00<00:00, 21687.92it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of sequences: 4858\n",
      "Checking the location field...\n",
      "Checking which fields in a CCT image entry are sequence-level...\n",
      "\n",
      "all_img_properties\n",
      "{'file', 'id', 'site', 'class', 'Detector', 'Fortnight'}\n",
      "\n",
      "img_level_properties\n",
      "{'file', 'image_id'}\n",
      "\n",
      "image-level properties that really should be sequence-level\n",
      "{'id', 'class', 'site', 'Detector', 'Fortnight'}\n",
      "\n",
      "Finished processing sequences.\n",
      "Example sequence items:\n",
      "\n",
      "{'seq_id': 'dummy_7453095a0ebb404d8997e52c4191c406', 'dataset': 'RSPB', 'images': [{'file': 'Fortnight13/ProsperousBay/Detector2428/PICT0006.JPG'}], 'id': 'Fortnight13/ProsperousBay/Detector2428/PICT0006', 'class': ['cat'], 'site': 'Man_and_Horse', 'Detector': 'Detector1002', 'Fortnight': '6'}\n",
      "\n",
      "[{'seq_id': 'dummy_0e5be32eb3ec4dcb9ad7189aa6983da4', 'dataset': 'RSPB', 'images': [{'file': 'Fortnight3/Man_and_Horse/Detector1012/PICT0455.JPG'}], 'id': 'Fortnight3/Man_and_Horse/Detector1012/PICT0455', 'class': ['rabbit'], 'site': 'Man_and_Horse', 'Detector': 'Detector1131', 'Fortnight': '15'}]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "sequences = process_sequences(embedded, dataset_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Verified that the sequence items meet requirements not captured by the schema.\n",
      "Verified that the sequence items conform to the schema.\n"
     ]
    }
   ],
   "source": [
    "sequences_schema_check.sequences_schema_check(sequences)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "write_json(path_to_output, sequences)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
